#library(rmdformats)
#downcute(
#fig_width = 8,
#fig_height = 5,
#fig_caption = TRUE,
#lightbox = FALSE,
#thumbnails = FALSE,
#gallery = FALSE,
#toc_depth = 2,
#embed_fonts = TRUE,
#use_bookdown = FALSE,
#pandoc_args = NULL,
#md_extensions = NULL,
#mathjax = "rmdformats",
#highlight = NULL,
# default_style = c("light", "dark"),
# downcute_theme = c("default", "chaos")
#)Add to headers to have dropdowns
Packages
Exploratory Data Analysis
- Variables to consider:
- date
- open
- high
- low
- close
- volume (volume of shares that are transacted)
- adjusted (adjusted close price of stock)
DraftKings
Ticker - DKNG
# Pull DraftKings Data
draftK <- c("DKNG") %>%
tq_get(., from = "2015-01-01") %>%
as_tsibble(.,
index = date)
# DraftKings Adjusted close price
draftK %>% autoplot(adjusted) +
labs(
title = "DraftKings Adjusted Price",
y = "Adjusted Stock Price ($)",
x = "Year (1D)",
subtitle = " ") + theme_minimal()# DraftKings Volume
draftK %>%
autoplot(volume) +
labs(
title = "DraftKings Volume",
y = "Trade Volume",
x = "Year (1D)",
subtitle = " ") + theme_minimal()# A tsibble: 1 x 8 [1D]
symbol date open high low close volume adjusted
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 DKNG 2021-09-20 58.0 58.9 56.0 57 11236700 57
- Takeaways
- Volume spikes inbetween 07/20 - 11/20ish
- Other notes:
- Volume = 10,769,200
Flutter Entertainment (FanDuel)
Ticker - PDYPY
# Pull FanDuel Data
fanDuel <- c("PDYPY") %>% tq_get(., from = "2018-01-01") %>%
as_tsibble(., index = date)
# FanDuel Adjusted close price
fanDuel %>% autoplot(adjusted,
color = "#0072B2") +
labs(
title = "FanDuel Adjusted Price",
y = "Adjusted Stock Price ($)",
x = "Year (1D)",
subtitle = " ") + theme_minimal()# DraftKings volume
fanDuel %>%
autoplot(volume,
color = "#0072B2") +
labs(title = "FanDuel Volume",
y = "Trade Volume",
x = "Year (1D)",
subtitle = " ") + theme_minimal()# A tsibble: 1 x 8 [1D]
symbol date open high low close volume adjusted
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 PDYPY 2021-09-20 101. 102. 100. 100. 22600 100.
- Notes:
- Acquired by Paddy Power Betfair in 2018 (Now known as FLutter Entertainment)
- HUGE spikes in volume after 2020
- Large jump after covid
- V-Shape recovery dipped at the beginning of 2021
- Volume = 11,900
MGM Resorts
Ticker - MGM
# Pull MGM Data
MGM <- c("MGM") %>% tq_get(., from = "2018-01-01") %>%
as_tsibble(., index = date)
# MGM Adjusted close price
MGM %>% autoplot(adjusted,
color = "red") +
labs(title = "MGM Adjusted Price",
y = "Adjusted Stock Price ($)",
x = "Year (1D)",
subtitle = " ") + theme_minimal()# MGM Volume
MGM %>% autoplot(volume,
color = "red") +
labs(title = "MGM Volume",
y = "Trade Volume",
x = "Year (1D)",
subtitle = " ") + theme_minimal()# A tsibble: 1 x 8 [1D]
symbol date open high low close volume adjusted
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 MGM 2021-09-20 40.7 42.0 40.5 41.3 6657000 41.3
- Notes:
- Public since the 70’s
- Large jump in volume after 2020
- Volume = 8,184,000
Penn National Gaming
Ticker - PENN
# Pull PENN Data
PENN <- c("PENN") %>% tq_get(., from = "2015-01-01") %>%
as_tsibble(., index = date)
# PENN Adjusted close price
PENN %>% autoplot(adjusted,
color = "#D55E00") +
labs(title = "PENN Adjusted Price",
y = "Adjusted Stock Price ($)",
x = "Year (1D)",
subtitle = " ") + theme_minimal()# PENN Volume
PENN %>% autoplot(volume,
color = "#D55E00") +
labs(title = "PENN Volume",
y = "Trade Volume",
x = "Year (1D)",
subtitle = " ") + theme_minimal()# A tsibble: 2 x 8 [1D]
symbol date open high low close volume adjusted
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 PENN 2021-09-17 76.1 77.6 73.6 74.6 6555000 74.6
2 PENN 2021-09-20 72.3 74.7 71.6 73.4 4460800 73.4
- Notes:
- Otherwise known as Barstool sports
- Largest spike since covid, out of all stocks
- Dipped in Feb 2021, but recovering
- Volume = 3,963,700
Caesars Entertainment
Ticker - CZR
# Pull CZR Data
CZR <- c("CZR") %>% tq_get(., from = "2018-01-01") %>%
as_tsibble(., index = date)
# CZR Adjusted close price
CZR %>% autoplot(adjusted,
color = "#009E73") +
labs(title = "CZR Adjusted Price",
y = "Adjusted Stock Price ($)",
x = "Year (1D)",
subtitle = " ") + theme_minimal()# CZR Volume
CZR %>% autoplot(volume,
color = "#009E73") +
labs(title = "CZR Volume",
y = "Trade Volume",
x = "Year (1D)",
subtitle = " ") + theme_minimal()# A tsibble: 1 x 8 [1D]
symbol date open high low close volume adjusted
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 CZR 2021-09-20 103. 106. 102. 105. 2662700 105.
- Notes:
- Odd volume spike very recently
- V-shape recovery and then some
- Volume = 3,601,800
Tidy Data
- Combining DraftKings, FanDuel, MGM, PENN, CZR from January 1st, 2018
- First pulling the data
- Issue with trading days (all starting at different dates), so creating variable PENNdays (Longest stock to pick from)
- Filtering all stocks for when they started sports gambling
- Joining everything together
# Pull Data
sportGam <- c("DKNG",
"PDYPY",
"MGM",
"PENN",
"CZR") %>%
tq_get(., from = "2018-01-01") %>%
as_tsibble(., index = date, key = symbol)
# Creating Longest day for trading day
PENNdays <- sportGam %>%
as_tibble() %>%
filter(symbol=="PENN" & date > as.Date("2018-05-14")) %>%
mutate(trading_day = row_number()) %>%
select(date, trading_day)
# Filtering stocks to start at different times
Use.Stocks <- sportGam %>%
as_tibble() %>%
filter(
(symbol=="DKNG" & date > as.Date("2019-12-05") |
(symbol=="PDYPY" & date > as.Date("2020-12-01") |
(symbol == "MGM" & date > as.Date("2019-09-05")) |
(symbol == "PENN" & date > as.Date("2018-05-14")) |
(symbol == "CZR" & date > as.Date("2018-09-06"))
)))
# Join Use.Stocks & PENNdays
Model.Me <- Use.Stocks %>%
as_tibble() %>%
left_join(., PENNdays) %>%
as_tsibble(index = trading_day,
key = symbol,
regular = TRUE)
# Plotting volumes
Model.Me %>%
autoplot(volume) +
scale_color_manual(values = cPal) +
labs(title = "",
x = "Trading Days",
y = "Volume") + theme_minimal()Train & Test
- Splitting train and test data sets in order to produce forecasts 2 weeks (14 trading days) out.
# Create Test
test <- Model.Me %>%
group_by(symbol) %>%
slice_max(trading_day,
n = 14) %>%
ungroup() %>%
as_tsibble(index = trading_day,
key = symbol)
## Checking test
test# A tsibble: 70 x 9 [1]
# Key: symbol [5]
symbol date open high low close volume adjusted trading_day
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
1 CZR 2021-08-31 102. 104. 99.7 102. 2196300 102. 831
2 CZR 2021-09-01 103. 103. 101. 102. 1813500 102. 832
3 CZR 2021-09-02 103. 105. 101. 104. 1673500 104. 833
4 CZR 2021-09-03 103. 105. 102. 104. 1446100 104. 834
5 CZR 2021-09-07 104 108. 103. 107. 2536900 107. 835
6 CZR 2021-09-08 107. 108. 103. 106. 1742000 106. 836
7 CZR 2021-09-09 107. 109. 106. 107. 2556900 107. 837
8 CZR 2021-09-10 107. 107. 104. 104. 1741300 104. 838
9 CZR 2021-09-13 106. 106. 101. 104. 1628700 104. 839
10 CZR 2021-09-14 103. 104. 101. 103. 1415800 103. 840
# … with 60 more rows
# Anti-join test out of Model.Me
train <- anti_join(Model.Me,
test) %>%
as_tsibble(index = trading_day,
key = symbol)
## Checking train
train# A tsibble: 2,703 x 9 [1]
# Key: symbol [5]
symbol date open high low close volume adjusted trading_day
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
1 CZR 2018-09-07 45.3 46.2 45.0 45.3 1203300 45.3 81
2 CZR 2018-09-10 45.7 46.2 45.1 46.0 853100 46.0 82
3 CZR 2018-09-11 46.0 46.8 45.2 46.7 971500 46.7 83
4 CZR 2018-09-12 46.8 48.2 46.3 48.0 711700 48.0 84
5 CZR 2018-09-13 48.0 49.6 48.0 49 799700 49 85
6 CZR 2018-09-14 49.2 49.8 48.0 48.8 1390600 48.8 86
7 CZR 2018-09-17 48.8 49.2 47.5 47.8 806800 47.8 87
8 CZR 2018-09-18 47.9 48.4 47.5 48.3 725700 48.3 88
9 CZR 2018-09-19 48.5 49 47 47.3 640700 47.3 89
10 CZR 2018-09-20 47.7 48.5 47.2 48.5 687500 48.5 90
# … with 2,693 more rows
Visualizing Adjusted Prices & Volume
Comparing All Adjusted Stock Prices
# Adjusted Prices
train %>%
ggplot(aes(trading_day,
adjusted,
color = symbol)) +
geom_line() +
scale_color_manual(values=cPal) +
labs(title =
"Comparing Sports Gambling Stocks Adjusted Prices",
y = "Stock Price ($)",
x = "Trading Day") + theme_minimal()# Logged Adjusted prices
train %>%
ggplot(aes(trading_day,
log(adjusted),
color = symbol)) +
geom_line() +
scale_color_manual(values=cPal) +
labs(title = "Comparing Sports Gambling Stocks",
subtitle = "Log Adjusted Prices",
y = "Stock Price (Log)",
x = "Trading Day") + theme_minimal()Comparing All Trading Volumes
# Volume
train %>%
ggplot(aes(trading_day,
volume,
color = symbol)) +
geom_line() +
scale_color_manual(values=cPal) +
labs(title = "Comparing Sports Gambling Stocks Trading Volume",
y = "Trade Volume",
x = "Trading Day") + theme_minimal()# Comparing logged volumes
train %>%
ggplot(aes(trading_day,
log(volume),
color = symbol)) +
geom_line() +
scale_color_manual(values=cPal) +
labs(title = "Comparing Sports Gambling Stocks",
subtitle = "Logged Volume",
y = "Trade Volume (Log)",
x = "Trading Day") + theme_minimal()Comparing all Volumes
# Facet volume
train %>%
ggplot(aes(trading_day,
volume,
color = symbol)) +
geom_line(show.legend = F) +
scale_color_manual(values=cPal) +
labs(title = "Comparing Sports Gambling Stocks Volumes",
y = "Trade Volume",
x = "Trading Day") +
facet_wrap(.~symbol,
scales = c("free_y")) + theme_minimal()# Facet logged volume
train %>%
ggplot(aes(trading_day,
log(volume),
color = symbol)) +
geom_line() +
scale_color_manual(values=cPal) +
labs(title = "Comparing Sports Gambling Stocks",
subtitle = "Logged Volume",
y = "Trade Volume (Log)",
x = "Trading Day") +
facet_wrap(.~symbol,
scales = c("free_y")) + theme_minimal()Decomposition
- Taking a look at the decompositions of each individual stock.
Comparing Decompositions
# Second Decomposition of adjusted
dcmpVol <- train %>%
model(stl = STL(volume))
# Looking at Volume components
dcmpVol %>%
components()# A dable: 2,703 x 7 [1]
# Key: symbol, .model [5]
# : volume = trend + remainder
symbol .model trading_day volume trend remainder season_adjust
<chr> <chr> <int> <dbl> <dbl> <dbl> <dbl>
1 CZR stl 81 1203300 1196169. 7131. 1203300
2 CZR stl 82 853100 1197297. -344197. 853100
3 CZR stl 83 971500 1198426. -226926. 971500
4 CZR stl 84 711700 1199554. -487854. 711700
5 CZR stl 85 799700 1200683. -400983. 799700
6 CZR stl 86 1390600 1201811. 188789. 1390600
7 CZR stl 87 806800 1202940. -396140. 806800
8 CZR stl 88 725700 1204068. -478368. 725700
9 CZR stl 89 640700 1205196. -564496. 640700
10 CZR stl 90 687500 1206325. -518825. 687500
# … with 2,693 more rows
# Looking at Decomposition components of Volume
dcmpVol %>%
components() %>%
autoplot() +
scale_color_manual(values=cPal) +
labs(x = "Trading Day") + theme_minimal()# Second Decomposition of adjusted
dcmpAdj <- train %>%
model(stl = STL(adjusted))
# Looking at Decomposition components of Adjusted
dcmpAdj %>%
components()# A dable: 2,703 x 7 [1]
# Key: symbol, .model [5]
# : adjusted = trend + remainder
symbol .model trading_day adjusted trend remainder season_adjust
<chr> <chr> <int> <dbl> <dbl> <dbl> <dbl>
1 CZR stl 81 45.3 50.7 -5.33 45.3
2 CZR stl 82 46.0 50.4 -4.45 46.0
3 CZR stl 83 46.7 50.1 -3.42 46.7
4 CZR stl 84 48.0 49.8 -1.89 48.0
5 CZR stl 85 49 49.6 -0.561 49
6 CZR stl 86 48.8 49.3 -0.430 48.8
7 CZR stl 87 47.8 49.0 -1.15 47.8
8 CZR stl 88 48.3 48.7 -0.419 48.3
9 CZR stl 89 47.3 48.4 -1.14 47.3
10 CZR stl 90 48.5 48.2 0.293 48.5
# … with 2,693 more rows
# Visualizing all of the Decompositions
dcmpAdj %>%
components() %>%
autoplot() +
scale_color_manual(values=cPal) +
labs(x = "Trading Day") + theme_minimal()CZR Decomposition
CZR Volume Decomposition
components(dcmpVol) %>%
filter(symbol == "CZR") %>%
autoplot(color = '#009E73') +
labs(title = "STL Decomposition of CZR Volume",
x = "Trading Day") + theme_minimal()CZR Adjusted Stock Price Decomposition
components(dcmpAdj) %>%
filter(symbol == "CZR") %>%
autoplot(color = '#009E73') +
labs(title = "STL Decomposition of CZR Adjusted Price",
x = "Trading Day") + theme_minimal()PENN Decomposition
PENN Volume Decomposition
components(dcmpVol) %>%
filter(symbol == "PENN") %>%
autoplot(color = '#D55E00') +
labs(title = "STL Decomposition of PENN Volume",
x = "Trading Day") + theme_minimal()PENN Adjusted Stock Price Decomposition
components(dcmpAdj) %>%
filter(symbol == "PENN") %>%
autoplot(color = '#D55E00') +
labs(title = "STL Decomposition of PENN Adjusted Price",
x = "Trading Day") + theme_minimal()MGM Decomposition
MGM Volume Decomposition
components(dcmpVol) %>%
filter(symbol == "MGM") %>%
autoplot(color = 'red') +
labs(title = "STL Decomposition of MGM Volume",
x = "Trading Day") + theme_minimal()MGM Adjusted Stock Price Decomposition
components(dcmpAdj) %>%
filter(symbol == "MGM") %>%
autoplot(color = 'red') +
labs(title = "STL Decomposition of MGM Adjusted Price",
x = "Trading Day") + theme_minimal()FanDuel Decomposition
Fanduel Volume Decomposition
components(dcmpVol) %>%
filter(symbol == "PDYPY") %>%
autoplot(color = '#0072B2') +
labs(title = "STL Decomposition of FanDuel Volume",
x = "Trading Day") + theme_minimal()Fanduel Adjusted Stock Price Decomposition
components(dcmpAdj) %>%
filter(symbol == "PDYPY") %>%
autoplot(color = '#0072B2') +
labs(title = "STL Decomposition of FanDuel Adjusted Price",
x = "Trading Day") + theme_minimal()DraftKings Decomposition
DraftKings Volume Decomposition
components(dcmpVol) %>%
filter(symbol == "DKNG") %>%
autoplot() +
labs(title = "STL Decomposition of DraftKings Volume",
x = "Trading Day") + theme_minimal()DraftKings Adjusted Stock Price Decomposition
components(dcmpAdj) %>%
filter(symbol == "DKNG") %>%
autoplot() +
labs(title = "STL Decomposition of DraftKings Adjusted Price",
x = "Trading Day") + theme_minimal()Modeling function
# Function
fc.model <- function(data, Outcome) {
Outcome <- ensym(Outcome)
fits <- data %>% model(
ARIMA = ARIMA(log(!!Outcome + 0.1)),
ARIMAD1 = ARIMA(log(!!Outcome + 0.1) ~ pdq(d=1)),
ETS = ETS(log(!!Outcome + 0.1))) %>% # one way of fixing log issue
mutate(Combo = (ARIMA + ETS)/2)
return(fits)
}Model fit and Report
# Fit the Models
fit <- train %>%
fc.model(., Outcome = volume)
# ALL ARIMA reports
fit %>% select(ARIMA) %>% report()# A tibble: 5 x 9
symbol .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 CZR ARIMA 0.187 -434. 875. 875. 894. <cpl [2]> <cpl [1]>
2 DKNG ARIMA 0.380 -406. 818. 818. 830. <cpl [1]> <cpl [1]>
3 MGM ARIMA 0.100 -132. 275. 275. 301. <cpl [3]> <cpl [2]>
4 PDYPY ARIMA 0.318 -156. 324. 324. 343. <cpl [1]> <cpl [3]>
5 PENN ARIMA 0.180 -465. 937. 937. 951. <cpl [1]> <cpl [1]>
ARIMA with 1 Differencing Report
# A tibble: 5 x 9
symbol .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 CZR ARIMAD1 0.187 -434. 875. 875. 894. <cpl [2]> <cpl [1]>
2 DKNG ARIMAD1 0.380 -406. 818. 818. 830. <cpl [1]> <cpl [1]>
3 MGM ARIMAD1 0.100 -132. 275. 275. 301. <cpl [3]> <cpl [2]>
4 PDYPY ARIMAD1 0.325 -158. 327. 328. 347. <cpl [1]> <cpl [4]>
5 PENN ARIMAD1 0.180 -465. 937. 937. 951. <cpl [1]> <cpl [1]>
ETS Report
# A tibble: 5 x 10
symbol .model sigma2 log_lik AIC AICc BIC MSE AMSE MAE
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 CZR ETS 0.195 -1869. 3745. 3745. 3759. 0.195 0.242 0.325
2 DKNG ETS 0.349 -1093. 2198. 2198. 2223. 0.345 0.383 0.409
3 MGM ETS 0.106 -992. 1989. 1989. 2002. 0.106 0.134 0.256
4 PDYPY ETS 0.343 -388. 782. 782. 792. 0.340 0.372 0.444
5 PENN ETS 0.185 -2088. 4182. 4182. 4196. 0.184 0.219 0.320
Combo Reports
Series: volume
Model: COMBINATION
Combination: (volume + volume) * 0.5
====================================
Series: volume + volume
Model: COMBINATION
Combination: volume + volume
============================
Series: volume
Model: ARIMA(2,1,1)
Transformation: log(volume + 0.1)
Coefficients:
ar1 ar2 ma1
0.3849 0.1206 -0.9037
s.e. 0.0452 0.0418 0.0252
sigma^2 estimated as 0.187: log likelihood=-433.68
AIC=875.36 AICc=875.41 BIC=893.83
Series: volume
Model: ETS(A,N,N)
Transformation: log(volume + 0.1)
Smoothing parameters:
alpha = 0.4455517
Initial states:
l
13.81917
sigma^2: 0.1955
AIC AICc BIC
3744.825 3744.857 3758.685
Series: volume
Model: COMBINATION
Combination: (volume + volume) * 0.5
====================================
Series: volume + volume
Model: COMBINATION
Combination: volume + volume
============================
Series: volume
Model: ARIMA(1,1,1)
Transformation: log(volume + 0.1)
Coefficients:
ar1 ma1
0.2894 -0.8094
s.e. 0.0595 0.0414
sigma^2 estimated as 0.1802: log likelihood=-465.32
AIC=936.65 AICc=936.68 BIC=950.81
Series: volume
Model: ETS(A,N,N)
Transformation: log(volume + 0.1)
Smoothing parameters:
alpha = 0.3911035
Initial states:
l
14.44588
sigma^2: 0.1849
AIC AICc BIC
4181.696 4181.725 4195.861
Series: volume
Model: COMBINATION
Combination: (volume + volume) * 0.5
====================================
Series: volume + volume
Model: COMBINATION
Combination: volume + volume
============================
Series: volume
Model: ARIMA(3,1,2)
Transformation: log(volume + 0.1)
Coefficients:
ar1 ar2 ar3 ma1 ma2
-0.4912 0.3812 -0.0210 0.0230 -0.7053
s.e. 0.0948 0.0852 0.0584 0.0842 0.0788
sigma^2 estimated as 0.1001: log likelihood=-131.64
AIC=275.29 AICc=275.46 BIC=300.56
Series: volume
Model: ETS(A,N,N)
Transformation: log(volume + 0.1)
Smoothing parameters:
alpha = 0.4604249
Initial states:
l
15.66385
sigma^2: 0.106
AIC AICc BIC
1989.222 1989.271 2001.866
Series: volume
Model: COMBINATION
Combination: (volume + volume) * 0.5
====================================
Series: volume + volume
Model: COMBINATION
Combination: volume + volume
============================
Series: volume
Model: ARIMA(1,1,1)
Transformation: log(volume + 0.1)
Coefficients:
ar1 ma1
0.1042 -0.6131
s.e. 0.0804 0.0616
sigma^2 estimated as 0.3803: log likelihood=-406.12
AIC=818.25 AICc=818.3 BIC=830.47
Series: volume
Model: ETS(A,Ad,N)
Transformation: log(volume + 0.1)
Smoothing parameters:
alpha = 0.2644703
beta = 0.000100007
phi = 0.9215727
Initial states:
l b
7.343845 0.7080567
sigma^2: 0.3491
AIC AICc BIC
2198.038 2198.233 2222.503
Series: volume
Model: COMBINATION
Combination: (volume + volume) * 0.5
====================================
Series: volume + volume
Model: COMBINATION
Combination: volume + volume
============================
Series: volume
Model: ARIMA(1,0,3) w/ mean
Transformation: log(volume + 0.1)
Coefficients:
ar1 ma1 ma2 ma3 constant
-0.3563 0.6447 0.3890 0.2351 13.5685
s.e. 0.2562 0.2501 0.1118 0.0800 0.0920
sigma^2 estimated as 0.3184: log likelihood=-155.94
AIC=323.89 AICc=324.35 BIC=343.27
Series: volume
Model: ETS(A,N,N)
Transformation: log(volume + 0.1)
Smoothing parameters:
alpha = 0.2256983
Initial states:
l
10.11015
sigma^2: 0.3433
AIC AICc BIC
782.2935 782.4246 791.9868
Model Selection
Models for CZR
# A tibble: 4 x 11
symbol .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 CZR Combo Traini… 2.63e5 2.70e6 1.07e6 -8.31 32.2 0.838 0.802 -0.0221
2 CZR ARIMA Traini… 3.00e5 2.71e6 1.07e6 -7.85 31.8 0.837 0.803 -0.0273
3 CZR ARIMAD1 Traini… 3.00e5 2.71e6 1.07e6 -7.85 31.8 0.837 0.803 -0.0273
4 CZR ETS Traini… 2.25e5 2.72e6 1.09e6 -8.77 33.1 0.857 0.806 -0.00502
Comparing CZR Models
CZR ARIMA
fit %>%
select(ARIMA) %>%
filter(symbol == "CZR") %>%
forecast(h = 14) %>%
autoplot() +
autolayer(Model.Me %>%
filter(symbol == "CZR") %>%
select(volume)) +
xlim(400,780) +
labs(x = "Trading Days",
y = "Trade Volume",
title = "Forecasting CZR Trading Volume",
subtitle = "ARIMA: Forecasting 14 Days") + theme_minimal()CZR Combo
fit %>%
select(Combo) %>%
filter(symbol == "CZR") %>%
forecast(h = 14) %>%
autoplot() +
autolayer(Model.Me %>%
filter(symbol == "CZR") %>%
select(volume)) +
ylim(0,2.5e+07) +
xlim(400,780) +
labs(x = "Trading Days",
y = "Trade Volume",
title = "Forecasting CZR Trading Volume",
subtitle = "ETS + ARIMA: Forecasting 14 Days") + theme_minimal()CZR Other Models
- Residuals are pretty crazy for both, comparing the two models (ARIMA and Combo (ARIMA + ETS))
- Not normally distributed (outliers)
- For ARIMA the acf looks more like white noise with one lag outside of the bounds
- While Combo has multiple lags outside the bounds
Models for MGM
# A tibble: 4 x 11
symbol .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 MGM ARIMA Training 453340. 5.12e6 3.22e6 -5.07 25.7 0.877 0.877 0.0356
2 MGM ARIMAD1 Training 453340. 5.12e6 3.22e6 -5.07 25.7 0.877 0.877 0.0356
3 MGM Combo Training 401612. 5.14e6 3.23e6 -5.24 25.8 0.878 0.879 0.0495
4 MGM ETS Training 349883. 5.23e6 3.28e6 -5.42 26.3 0.892 0.896 0.0704
Comparing MGM Models
MGM ARIMA Model
fit %>%
select(ARIMA) %>%
filter(symbol == "MGM") %>%
forecast(h = 14) %>%
autoplot() +
autolayer(Model.Me %>%
filter(symbol == "MGM") %>%
select(volume)) +
xlim(400,780) +
labs(x = "Trading Days",
y = "Trade Volume",
title = "Forecasting MGM Trading Volume",
subtitle = "ARIMA: Forecasting 14 Days") + theme_minimal()## Checking Residuals
fit %>%
select(ARIMA) %>%
filter(symbol == "MGM") %>%
gg_tsresiduals() +
labs(title = "ARIMA Model Fit")MGM Combo Model
fit %>%
select(Combo) %>%
filter(symbol == "MGM") %>%
forecast(h = 14) %>%
autoplot() +
autolayer(Model.Me %>%
filter(symbol == "MGM") %>%
select(volume)) +
xlim(400,780) +
labs(x = "Trading Days",
y = "Trade Volume",
title = "Forecasting MGM Trading Volume",
subtitle = "ETS + ARIMA: Forecasting 14 Days") + theme_minimal()## Checking Residuals
fit %>%
filter(symbol == "MGM") %>%
select(Combo) %>%
gg_tsresiduals() +
labs(title = "ETS + ARIMA Model Fit")Models for PENN
# A tibble: 4 x 11
symbol .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 PENN Combo Training 315370. 3.04e6 1.49e6 -8.13 31.7 0.861 0.871 0.107
2 PENN ETS Training 288232. 3.04e6 1.50e6 -8.29 32.0 0.868 0.873 0.136
3 PENN ARIMA Training 342508. 3.05e6 1.49e6 -7.96 31.8 0.865 0.877 0.0877
4 PENN ARIMAD1 Training 342508. 3.05e6 1.49e6 -7.96 31.8 0.865 0.877 0.0877
Comparing PENN Models
PENN ARIMA Model
fit %>%
select(ARIMA) %>%
filter(symbol == "PENN") %>%
forecast(h = 14) %>%
autoplot() +
autolayer(Model.Me %>%
filter(symbol == "PENN") %>%
select(volume)) +
xlim(400,780) +
labs(x = "Trading Days",
y = "Trade Volume",
title = "Forecasting PENN Trading Volume",
subtitle = "ARIMA: Forecasting 14 Days") + theme_minimal()## Checking Residuals
fit %>%
select(ARIMA) %>%
filter(symbol == "PENN") %>%
gg_tsresiduals() +
labs(title = "ARIMA Model Fit")PENN Combo Model
fit %>%
select(Combo) %>%
filter(symbol == "PENN") %>%
forecast(h = 14) %>%
autoplot() +
autolayer(Model.Me %>%
filter(symbol == "PENN") %>%
select(volume)) +
xlim(400,780) +
labs(x = "Trading Days",
y = "Trade Volume",
title = "Forecasting PENN Trading Volume",
subtitle = "ETS + ARIMA: Forecasting 14 Days") + theme_minimal()## Checking Residuals
fit %>% filter(symbol == "PENN") %>%
select(Combo) %>%
gg_tsresiduals() +
labs(title ="ETS + ARIMA Model Fit")Models for PDYPY
# A tibble: 4 x 11
symbol .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 PDYPY ARIMA Training 4616. 22295. 11959. -14.5 43.5 0.780 0.794 0.0371
2 PDYPY Combo Training 4337. 22333. 12048. -15.4 43.9 0.785 0.796 0.0597
3 PDYPY ARIMAD1 Training 4539. 22441. 11993. -13.7 42.9 0.782 0.799 0.0399
4 PDYPY ETS Training 4058. 22723. 12696. -16.3 46.4 0.828 0.809 0.104
Comparing PDYPY Models
PDYPY ARIMA Model
fit %>%
select(ARIMA) %>%
filter(symbol == "PDYPY") %>%
forecast(h = 14) %>%
autoplot() +
autolayer(Model.Me %>%
filter(symbol == "PDYPY") %>%
select(volume)) +
xlim(625,780) +
labs(x = "Trading Days",
y = "Trade Volume",
title = "Forecasting FanDuel Trading Volume",
subtitle = "ARIMA: Forecasting 14 Days") + theme_minimal()## Checking Residuals
fit %>%
select(ARIMA) %>%
filter(symbol == "PDYPY") %>%
gg_tsresiduals() +
labs(title = "ARIMA Model Fit")PDYPY Combo Model
fit %>%
select(Combo) %>%
filter(symbol == "PDYPY") %>%
forecast(h = 14) %>%
autoplot() +
autolayer(Model.Me %>%
filter(symbol == "PDYPY") %>%
select(volume)) +
xlim(625,780) +
labs(x = "Trading Days",
y = "Trade Volume",
title = "Forecasting FanDuel Trading Volume",
subtitle = "ETS + ARIMA: Forecasting 14 Days") + theme_minimal()## Checking Residuals
fit %>%
filter(symbol == "PDYPY") %>%
select(Combo) %>%
gg_tsresiduals() +
labs(title = "ETS + ARIMA Model Fit")Models for DKNG
# A tibble: 4 x 11
symbol .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 DKNG ARIMA Training 9.12e5 8.16e6 4.72e6 -10.7 40.0 0.860 0.867 0.0525
2 DKNG ARIMAD1 Training 9.12e5 8.16e6 4.72e6 -10.7 40.0 0.860 0.867 0.0525
3 DKNG Combo Training 1.03e6 8.21e6 4.73e6 -15.2 43.4 0.862 0.871 0.140
4 DKNG ETS Training 1.15e6 8.36e6 4.89e6 -19.6 48.1 0.890 0.888 0.234
Comparing DKNG Models
DKNG ARIMA
fit %>%
select(ARIMA) %>%
filter(symbol == "DKNG") %>%
forecast(h = 14) %>%
autoplot() +
autolayer(Model.Me %>%
filter(symbol == "DKNG") %>%
select(volume)) +
xlim(500,780) +
labs(x = "Trading Days",
y = "Trade Volume",
title = "Forecasting DraftKings Trading Volume",
subtitle = "ARIMA: Forecasting 14 Days") + theme_minimal()## Checking Residuals
fit %>%
select(ARIMA) %>%
filter(symbol == "DKNG") %>%
gg_tsresiduals() +
labs(title = "ARIMA Model Fit")DKNG Combo Model
fit %>%
select(Combo) %>%
filter(symbol == "DKNG") %>%
forecast(h = 14) %>%
autoplot() +
autolayer(Model.Me %>%
filter(symbol == "DKNG") %>%
select(volume)) +
xlim(500,780) +
labs(x = "Trading Days",
y = "Trade Volume",
title = "Forecasting DraftKings Trading Volume",
subtitle = "ETS + ARIMA: Forecasting 14 Days") + theme_minimal()## Checking Residuals
fit %>%
filter(symbol == "DKNG") %>%
select(Combo) %>%
gg_tsresiduals() +
labs(title = "ETS + ARIMA Model Fit")Best Model Fits
# Create fctest forecasting 14 periods out
## Looking at the accuracy in our test data arranged by RMSE
fctest <- fit %>%
forecast(h = 14) %>%
accuracy(test) %>%
arrange(RMSE) %>%
group_by(symbol) %>%
slice_min(RMSE,
n = 1,
with_ties = F)
# Best models for each Symbol
fctest# A tibble: 5 x 11
# Groups: symbol [5]
.model symbol .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 ARIMA CZR Test -746445. 1.28e6 1.13e6 -51.5 58.2 NaN NaN -0.0568
2 ETS DKNG Test -5598333. 6.50e6 5.79e6 -67.5 68.7 NaN NaN 0.266
3 ETS MGM Test 223964. 2.19e6 1.52e6 -5.58 23.0 NaN NaN 0.284
4 ARIMA PDYPY Test -7121. 1.01e4 9.22e3 -58.5 64.8 NaN NaN -0.0270
5 ARIMA PENN Test -61103. 1.12e6 9.59e5 -8.37 23.2 NaN NaN 0.238
# Creating Model.Table based on the fit
model.table <- 1:5 %>%
sapply(function(x) fit %>%
filter(symbol == fctest$symbol[[x]]) %>% select(fctest$.model[[x]]))
model.table$ARIMA
<lst_mdl[1]>
[1] <ARIMA(2,1,1)>
$ETS
<lst_mdl[1]>
[1] <ETS(A,Ad,N)>
$ETS
<lst_mdl[1]>
[1] <ETS(A,N,N)>
$ARIMA
<lst_mdl[1]>
[1] <ARIMA(1,0,3) w/ mean>
$ARIMA
<lst_mdl[1]>
[1] <ARIMA(1,1,1)>
Forecasting Test Data
# A mable: 5 x 5
# Key: symbol [5]
symbol ARIMA ARIMAD1 ETS Combo
<chr> <model> <model> <model> <model>
1 CZR <ARIMA(0,0,0) w/ mean> <ARIMA(1,1,0)> <ETS(A,N,N)> <COMBINATION>
2 DKNG <ARIMA(0,0,0) w/ mean> <ARIMA(0,1,1)> <ETS(A,N,N)> <COMBINATION>
3 MGM <ARIMA(0,0,0) w/ mean> <ARIMA(1,1,0)> <ETS(M,N,N)> <COMBINATION>
4 PDYPY <ARIMA(0,0,0) w/ mean> <ARIMA(0,1,0)> <ETS(M,N,N)> <COMBINATION>
5 PENN <ARIMA(0,0,0) w/ mean> <ARIMA(1,1,0)> <ETS(A,N,N)> <COMBINATION>
CZR Forecasts and Plot
# For CZR, ETS was best model
CZR_best <- testfit %>%
filter(symbol == "CZR") %>%
forecast(h=14)
## Plotting the best model for CZR
CZR_best %>%
filter(.model == "ETS") %>%
autoplot() +
autolayer(Model.Me %>%
filter(symbol == "CZR") %>%
select(volume)) +
labs(x = "Trading Days",
y = "Trade Volume (Log)",
title = "Forecasting CZR Trading Volume",
subtitle = "Best Model - ETS: Forecasting 14 Days") + theme_minimal()DKNG Forecasts and Plot
# For DKNG, ETS was best model
DKNG_best <- testfit %>%
filter(symbol == "DKNG") %>%
forecast(h=14)
# Plotting the best model for DKNG
DKNG_best %>%
filter(.model == "ETS") %>%
autoplot() +
autolayer(Model.Me %>%
filter(symbol == "DKNG") %>%
select(volume)) +
labs(x = "Trading Days",
y = "Trade Volume (Log)",
title = "Forecasting DraftKings Trading Volume",
subtitle = "Best Model - ETS: Forecasting 14 Days") + theme_minimal()MGM Forecasts and Plot
# For MGM, ARIMA was best model
MGM_best <- testfit %>%
filter(symbol == "MGM") %>%
forecast(h=14)
# Plotting the best model for MGM
MGM_best %>%
filter(.model == "ARIMA") %>%
autoplot() +
autolayer(Model.Me %>%
filter(symbol == "MGM") %>%
select(volume)) +
labs(x = "Trading Days",
y = "Trade Volume (Log)",
title = "Forecasting MGM Trading Volume",
subtitle = "Best Model - ARIMA: Forecasting 14 Days") + theme_minimal()FanDuel Forecasts and Plot
# For fanDuel, ETS was best model
fanDuel_best <- testfit %>%
filter(symbol == "PDYPY") %>%
forecast(h=14)
# Plotting the best model for PDYPY
fanDuel_best %>%
filter(.model == "ETS") %>%
autoplot() +
autolayer(Model.Me %>%
filter(symbol == "PDYPY") %>%
select(volume)) +
labs(x = "Trading Days",
y = "Trade Volume (Log)",
title = "Forecasting FanDuel Trading Volume",
subtitle = "Best Model - ETS: Forecasting 14 Days") + theme_minimal()PENN Forecasts and Plot
# For PENN, ETS was best model
PENN_best <- testfit %>%
filter(symbol == "PENN") %>%
forecast(h=14)
# Plotting the best model for PENN
PENN_best %>%
filter(.model == "ETS") %>%
autoplot() +
autolayer(Model.Me %>%
filter(symbol == "PENN") %>%
select(volume)) +
labs(x = "Trading Days",
y = "Trade Volume (Log)",
title = "Forecasting PENN Trading Volume",
subtitle = "Best Model - ETS: Forecasting 14 Days") + theme_minimal()Model Proofing
Different way to look at accuracy
# A fable: 280 x 5 [1]
# Key: symbol, .model [20]
symbol .model trading_day volume .mean
<chr> <chr> <dbl> <dist> <dbl>
1 CZR ARIMA 831 t(N(15, 0.19)) 2640954.
2 CZR ARIMA 832 t(N(15, 0.23)) 2727813.
3 CZR ARIMA 833 t(N(15, 0.26)) 2854810.
4 CZR ARIMA 834 t(N(15, 0.28)) 2917556.
5 CZR ARIMA 835 t(N(15, 0.29)) 2961332.
6 CZR ARIMA 836 t(N(15, 0.3)) 2990233.
7 CZR ARIMA 837 t(N(15, 0.31)) 3011203.
8 CZR ARIMA 838 t(N(15, 0.32)) 3027349.
9 CZR ARIMA 839 t(N(15, 0.33)) 3040685.
10 CZR ARIMA 840 t(N(15, 0.33)) 3052357.
# … with 270 more rows
CZR Accuracy
# A tibble: 4 x 11
.model symbol .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 ARIMA CZR Test -746445. 1283550. 1.13e6 -51.5 58.2 NaN NaN -0.0568
2 ARIMAD1 CZR Test -746445. 1283550. 1.13e6 -51.5 58.2 NaN NaN -0.0568
3 Combo CZR Test -862122. 1337767. 1.22e6 -57.1 63.3 NaN NaN -0.0976
4 ETS CZR Test -977798. 1401422. 1.31e6 -62.7 68.4 NaN NaN -0.137
PENN Accuracy
# A tibble: 4 x 11
.model symbol .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 ARIMA PENN Test -61103. 1120642. 959048. -8.37 23.2 NaN NaN 0.238
2 ARIMAD1 PENN Test -61103. 1120642. 959048. -8.37 23.2 NaN NaN 0.238
3 Combo PENN Test -158967. 1121156. 952601. -10.4 23.5 NaN NaN 0.224
4 ETS PENN Test -256830. 1133383. 946154. -12.5 23.8 NaN NaN 0.212
MGM Accuracy
# A tibble: 4 x 11
.model symbol .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 ARIMA MGM Test 301339. 2286054. 1620270. -4.74 24.3 NaN NaN 0.304
2 ARIMAD1 MGM Test 301339. 2286054. 1620270. -4.74 24.3 NaN NaN 0.304
3 Combo MGM Test 262651. 2236477. 1572396. -5.16 23.6 NaN NaN 0.294
4 ETS MGM Test 223964. 2188870. 1524521. -5.58 23.0 NaN NaN 0.284
PDYPY Accuracy
# A tibble: 4 x 11
.model symbol .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 ARIMA PDYPY Test -7121. 10077. 9217. -58.5 64.8 NaN NaN -0.0270
2 ARIMAD1 PDYPY Test -9947. 12326. 11368. -75.8 80.1 NaN NaN -0.0211
3 Combo PDYPY Test -9650. 11980. 11021. -73.5 77.6 NaN NaN -0.0183
4 ETS PDYPY Test -12179. 14093. 12825. -88.6 90.5 NaN NaN -0.00453
DKNG Accuracy
# A tibble: 4 x 11
.model symbol .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 ARIMA DKNG Test -8176679. 9170314. 8176679. -95.8 95.8 NaN NaN 0.449
2 ARIMAD1 DKNG Test -8176679. 9170314. 8176679. -95.8 95.8 NaN NaN 0.449
3 Combo DKNG Test -6887506. 7815933. 6945703. -81.6 82.0 NaN NaN 0.367
4 ETS DKNG Test -5598333. 6496910. 5791326. -67.5 68.7 NaN NaN 0.266